This document provides the code for the analyses reported in:
This script cleans the self/other task data for the analyses reported in supplementary material.
# define variables and paths
datadir = "/Volumes/psych-cog/dsnlab/SFIC/behavior/task/"
sub_pattern = "L[0-3]{1}([0-9]{2})"
waves = list.files(datadir, pattern = "t[1-3]{1}")
# initialize data frame
data = data.frame()
# load data
for (wave in waves) {
wavedir = paste0(datadir, wave, "/RTs_txt/")
subs = list.files(wavedir, pattern = sub_pattern)
for (sub in subs) {
filename = paste0(wavedir, sub)
tmp = tryCatch(read.table(filename, header = FALSE, sep = "\t", fill = TRUE, skipNul = TRUE) %>%
mutate(file = filename,
wave = wave,
V1 = str_replace(V1, "\\*", "000")) %>%
extract(file, "subjectID", sub_pattern) %>%
mutate(subjectID = paste0("s0", subjectID),
subFile = sub) %>%
extract(V1, "script", ".*000(.*)_.*", remove = FALSE) %>%
extract(V1, "buttons", "([a-zA-Z0-9= ]+).*000.*", remove = FALSE) %>%
mutate(order = ifelse(grepl("vp|vn|pp|pn", V4), .$script[grep("hv", script)], NA),
yn.key = ifelse(grepl("vp|vn|pp|pn", V4), .$buttons[grep("1|2|3|4|g|r|b|y", buttons)], NA)) %>%
filter(!is.na(order)) %>%
filter(!V1 == "\026\001\020\f\003\f\b8list\001reco\030\001pjstenum\004deft") %>%
rename("rt" = V2,
"trial.end" = V3,
"stimulus" = V4,
"response" = V5) %>%
mutate(trial = row_number()) %>%
mutate_all(as.character) %>%
select(subFile, subjectID, wave, order, yn.key, trial, rt, trial.end, stimulus, response),
error = function(e) NULL)
data = bind_rows(data, tmp)
}
}# load
age = read.csv("/Volumes/psych-cog/dsnlab/SFIC_data_sheets/SFIC_age.pds.gender.csv") %>%
rename("subjectID" = SID,
"wave" = wavenum,
"gender" = Gender) %>%
mutate(subjectID = sprintf("s%03d", subjectID),
wave = paste0("t", wave)) %>%
select(-c(DOB, session))
# merge
data = data %>%
left_join(., age)# check number of trials for all participants
data %>%
group_by(subjectID, wave) %>%
summarise(n.trials = n()) %>%
arrange(desc(n.trials))# check irregular number of trials
data %>%
filter(subjectID %in% c("s030", "s035", "s018", "s027", "s063", "s081")) %>%
group_by(subFile, subjectID, wave) %>%
summarise(n.trials = n()) %>%
arrange(desc(n.trials))hpsphvsvhpsphvsv and lacks responseshvsvhpsp and has responses –> this is a copy of s019 t3 (timing and responses match)hpsphvsv and has responses –> using this runL235 -2 has the correct order hpsphvsv –> using L235 -2 (second start; yn key = 12)L318-928226 is the subject ID listed –> using L318-928226.txt! as a response for many of the trials –> using second runs030_t3 = data %>%
filter(subjectID == "s030" & wave == "t3") %>%
slice(-1:-160) %>%
mutate(order = "hpsphvsv") %>%
mutate(trial = as.character(row_number()))
s035_t2 = data %>%
filter(subFile == "L235 - 2.txt") %>%
slice(-1:-16) %>%
mutate(trial = as.character(row_number()),
yn.key = "12")
s027_t1 = data %>%
filter(subjectID == "s027" & wave == "t1") %>%
slice(-1:-80) %>%
mutate(trial = as.character(row_number()),
yn.key = "34")
s063_t1 = data %>%
filter(subjectID == "s063" & wave == "t1") %>%
slice(-1:-19) %>%
mutate(trial = as.character(row_number()),
yn.key = "34")
s081_t3 = data %>%
filter(subjectID == "s081" & wave == "t3") %>%
slice(-1:-16) %>%
mutate(trial = as.character(row_number()))
# filter out subjects original data and then bind subsetted data
data1 = data %>%
filter(!subFile == "L318-085822.txt") %>%
filter(!(subjectID == "s030" & wave == "t3")) %>%
filter(!(subjectID == "s035" & wave == "t2")) %>%
filter(!(subjectID == "s027" & wave == "t1")) %>%
filter(!(subjectID == "s063" & wave == "t1")) %>%
filter(!(subjectID == "s081" & wave == "t3")) %>%
bind_rows(., s030_t3) %>%
bind_rows(., s035_t2) %>%
bind_rows(., s027_t1) %>%
bind_rows(., s063_t1) %>%
bind_rows(., s081_t3)
# check number of rows
nrow(data1) == nrow(data) - 80 - 160 - 96 - 80 - 19 - 16## [1] TRUE
Manual changes to some text files were made to facilitate loading the data into r. When the yes/no key was on a separate line from the condition order, the line was removed. When strange characters were included before the yes/no key they were removed.
For all other errors, these will be addressed directly in r. The original, raw data is still intact as binary files in SFIC/behavor/[wave]/RTs.
## [1] "grrg"
## [2] "gr"
## [3] "rg"
## [4] "grrggggggggggggg"
## [5] NA
## [6] "43"
## [7] "34"
## [8] "grrggr"
## [9] "grr"
## [10] "3333333334"
## [11] "3434"
## [12] "4=no 3=yes"
## [13] "34343434"
## [14] "344"
## [15] "12"
## [16] "1212121212121212112222211121212121212121212121212111112"
## [17] "yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy"
## [18] "3=yes 4=no"
## [19] "by"
## [20] "122"
## [21] "b12"
## [22] "121212"
## [23] "121"
## [24] "2"
## [25] "1212"
## [26] "111222"
## [27] "bb12"
## [28] "b"
# select responses to recode
yn.gr = c("grrggggggggggggg", "grrggr", "grr")
yn.34 = c("3333333334", "3434", "4=no 3=yes", "34343434", "344", "3=yes 4=no")
yn.12 = c("1212121212121212112222211121212121212121212121212111112", "yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy", "122", "b12", "121212", "121", "2", "1212", "111222", "bb12", "b")
# recode responses
data2 = data1 %>%
mutate(yn.key = ifelse(yn.key %in% yn.gr, "gr",
ifelse(yn.key %in% yn.34, "34",
ifelse(yn.key %in% yn.12, "12", yn.key))),
yn.key = ifelse(wave == "t1" & subjectID %in% c("s002"), "rg",
ifelse(wave == "t2" & subjectID == "s065", "12", yn.key)))
# check keys
unique(data2$yn.key)## [1] "rg" "gr" NA "43" "34" "12" "by"
# summarize response types by wave
data2 %>%
group_by(yn.key, wave) %>%
summarize(n = n()) %>%
arrange(wave) %>%
select(wave, everything())# summarize response types by subject and wave
data2 %>%
group_by(subjectID, yn.key, wave) %>%
summarize(n = n()) %>%
arrange(wave) %>%
select(wave, everything())data2.g0 = data2 %>%
mutate(rt = as.numeric(rt)) %>%
select(subjectID, wave, rt, response)
data2.g0 %>%
select(rt) %>%
filter(!rt == 0) %>%
psych::describe(quant = c(.25, .75))# plot RTs for all responses
data2.g0 %>%
mutate(`previous missed` = ifelse(lag(rt) == 0, "yes", "no")) %>%
filter(!is.na(`previous missed`) & !rt == 0) %>%
ggplot(aes("", rt, color = `previous missed`)) +
geom_boxplot() +
geom_jitter(width = 0.1, alpha = .1) +
facet_grid(~wave) +
labs(x = "") +
theme_minimal()# plot RTs by response
data2.g0 %>%
mutate(`previous missed` = ifelse(lag(rt) == 0, "yes", "no"),
n.char = nchar(response)) %>%
filter(!is.na(`previous missed`) & !rt == 0) %>%
ggplot(aes("", rt, color = `previous missed`)) +
geom_boxplot() +
geom_jitter(width = 0.1, alpha = .1) +
facet_grid(n.char ~wave) +
labs(x = "") +
theme_minimal()# plot RTs for the first time hearing each stimulus only
data %>%
mutate(rt = as.numeric(rt),
`previous missed` = ifelse(lag(rt) == 0, "yes", "no"),
n.char = nchar(response),
trial = as.numeric(trial),
keep = ifelse(trial %in% c(1:20, 41:60), 1, 0)) %>%
filter(!is.na(`previous missed`) & !rt == 0 & keep == 1) %>%
ggplot(aes("", rt, color = `previous missed`)) +
geom_boxplot() +
geom_jitter(width = 0.1, alpha = .1) +
facet_grid(~wave) +
labs(x = "") +
theme_minimal()# density plots
data %>%
mutate(rt = as.numeric(rt),
`previous missed` = ifelse(lag(rt) == 0, "yes", "no"),
n.char = nchar(response),
trial = as.numeric(trial),
keep = ifelse(trial %in% c(1:20, 41:60), 1, 0)) %>%
filter(!is.na(`previous missed`) & !rt == 0 & keep == 1) %>%
ggplot(aes(rt, fill = `previous missed`)) +
geom_density(alpha = .75) +
facet_grid(~wave) +
labs(x = "") +
theme_minimal()# plot RTs of trials where the previous trial was missed and there are 2 responses
data2.g0 %>%
group_by(subjectID, wave) %>%
mutate(n.char = nchar(response)) %>%
filter(lag(rt) == 0 & n.char > 1) %>%
ggplot(aes("", rt)) +
geom_violin() +
geom_jitter(width = 0.1, alpha = .25) +
facet_grid(~wave) +
labs(x = "") +
theme_minimal()# plot RTs of trials where the previous trial was missed is one response
data2.g0 %>%
group_by(subjectID, wave) %>%
mutate(n.char = nchar(response)) %>%
filter(lag(rt) == 0 & n.char == 1) %>%
ggplot(aes("", rt)) +
geom_violin() +
geom_jitter(width = 0.1, alpha = .25) +
facet_grid(~wave) +
labs(x = "") +
theme_minimal()## 0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100%
## 0.0000 0.0000 0.6774 1.7660 1.9300 2.0590 2.1840 2.3110 2.4602 2.6491 2.9740
## 0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100%
## 0.0000 0.0000 1.4990 1.7757 1.9016 2.0280 2.1494 2.2610 2.4062 2.6161 2.9790
## 0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100%
## 0.0000 0.1270 1.6150 1.7787 1.8940 1.9890 2.0810 2.1970 2.3372 2.5090 2.9670
There are missing responses and multiple responses.
Coding rules:
(data3 = data2 %>%
mutate(rt = as.numeric(rt),
trial = as.numeric(trial),
quick = ifelse(rt <= 1 & rt > 0, 1, 0),
n.char = nchar(response),
first = substring(response, 1, 1),
second = substring(response, 2, 2),
third = substring(response, 3, 3)))# describe rts
data3 %>%
mutate(rt.bin = ifelse(rt > 0 & rt <= .25, "lt.25",
ifelse(rt > .25 & rt <= .5, "lt.5",
ifelse(rt > .5 & rt <= .75, "lt.75",
ifelse(rt > .75 & rt <= 1, "lt.1", "gt.1"))))) %>%
group_by(rt.bin, wave) %>%
summarize(n = n()) %>%
spread(wave, n)# describe average RT by subject
data3 %>%
filter(rt > 0) %>%
group_by(subjectID, wave) %>%
summarize(q10 = quantile(rt, .1, na.rm = TRUE),
q25 = quantile(rt, .25, na.rm = TRUE),
q50 = quantile(rt, .5, na.rm = TRUE),
mean.rt = mean(rt, na.rm = TRUE),
threshold = mean.rt - sd(rt, na.rm = TRUE))# summarize issues
data3 %>%
group_by(subFile) %>%
mutate(issue = ifelse(response == "" & lead(rt) < .5 & lead(n.char) == 1, "missed, RT <.5, 1 response",
ifelse(response == "" & lead(rt) >= .5 & lead(rt) < .75 & lead(n.char) == 1, "missed, .5< RT <.75, 1 response",
ifelse(response == "" & lead(rt) >= .75 & lead(rt) < 1 & lead(n.char) == 1, "missed, .75< RT <1, 1 response",
ifelse(response == "" & lead(rt) < 1 & lead(n.char) == 2, "missed, RT <1, 2 responses",
ifelse(response == "" & lead(rt) < 1 & lead(n.char) == 3, "missed, RT <1, 3 responses",
ifelse(lead(rt) < 1 & lead(n.char) == 1, "not missed, RT <1, 1 response",
ifelse(lead(rt) < 1 & lead(n.char) > 1, "not missed, RT <1, >1 response", NA)))))))) %>%
group_by(issue, wave) %>%
summarize(n = n()) %>%
spread(wave, n)# check potential recoding responses
data.check = data3 %>%
group_by(subFile) %>%
mutate(response.fixed = ifelse(lag(response) == "" & rt <= .75 & n.char == 1, "", response), #missed, RT <.75, 1 response
response.fixed = ifelse(response == "" & lead(rt) <= .75 & lead(n.char) == 1, lead(first), #missed, RT <.75, 1 response
ifelse(response == "" & lead(rt) > .75 & lead(rt) < 1 & lead(n.char) == 1, response, #missed, .75< RT <1, 1 response
ifelse(response == "" & lead(rt) < 1 & lead(n.char) == 2, lead(first), #missed, RT <1, 2 responses
ifelse(response == "" & lead(rt) < 1 & lead(n.char) == 3, lead(first), #missed, RT <1, 3 responses
ifelse(n.char == 2, second, #2 responses
ifelse(n.char == 3, third, #3 responses
ifelse(trial == 80, response, 9999))))))), #response
response.fixed = ifelse(response == "" & lead(rt) > .75 & lead(rt) < 1 & lead(n.char) == 1, "00", response.fixed), #missed, .75< RT <1, 1 response
response.check = ifelse(response.fixed == 9999, 0, 1),
response.check = ifelse(lag(response.check) == 1, 1, response.check)) %>%
filter(response.check == 1)
# recode responses
data4 = data3 %>%
group_by(subFile) %>%
mutate(response.fixed = ifelse(lag(response) == "" & rt <= .75 & n.char == 1, "", response), #missed, RT <.75, 1 response
response.fixed = ifelse(response == "" & lead(rt) <= .75 & lead(n.char) == 1, lead(first), #missed, RT <.75, 1 response
ifelse(response == "" & lead(rt) > .75 & lead(rt) < 1 & lead(n.char) == 1, response, #missed, .75< RT <1, 1 response
ifelse(response == "" & lead(rt) < 1 & lead(n.char) == 2, lead(first), #missed, RT <1, 2 responses
ifelse(response == "" & lead(rt) < 1 & lead(n.char) == 3, lead(first), #missed, RT <1, 3 responses
ifelse(n.char == 2, second, #2 responses
ifelse(n.char >= 3, third, #3 responses
ifelse(trial == 80, response, response.fixed))))))))
# summarize affected trials
data3 %>%
group_by(subFile) %>%
mutate(issue = ifelse(response == "" & lead(rt) <= .75 & lead(n.char) == 1, "issue",
ifelse(response == "" & lead(rt) > .75 & lead(rt) < 1 & lead(n.char) == 1, "issue",
ifelse(response == "" & lead(rt) < 1 & lead(n.char) == 2, "issue",
ifelse(response == "" & lead(rt) < 1 & lead(n.char) == 3, "issue", "no issue"))))) %>%
filter(!is.na(issue)) %>% #filter out missing last trials
group_by(issue, wave) %>%
summarize(n = n()) %>%
group_by(wave) %>%
mutate(percent = round((n / sum(n)) * 100, 1)) %>%
group_by(issue) %>%
mutate(n_issue = sum(n)) %>%
ungroup() %>%
mutate(percent_issue = round((664 / (664 + 11959)) * 100, 1))## [1] "" "g" "r" NA "3" "4" "1" "2" "y" "b"
# get percentage of missed trials
(missed = data4 %>%
mutate(response.fixed = ifelse(response.fixed %in% c("g", "r", "3", "4", "1", "2", "b", "y"), "response", NA)) %>%
group_by(subjectID, wave, response.fixed) %>%
summarize(n = n()) %>%
spread(response.fixed, n) %>%
mutate(percent = round((`<NA>` / 80) * 100, 1)) %>%
arrange(desc(percent)))## [1] "rg" "gr" NA "43" "34" "12" "by"
missing.yn = data4 %>%
filter(is.na(yn.key))
data5 = data4 %>%
mutate(yn.key = ifelse(is.na(yn.key) & wave == "t1" & subjectID == "s017" , "gr",
ifelse(is.na(yn.key) & wave == "t2" & subjectID == "s018" , "12",
ifelse(is.na(yn.key) & wave == "t2" & subjectID == "s042" , "12",
ifelse(is.na(yn.key) & wave == "t3", "12", yn.key)))),
response.yn = ifelse(yn.key == "gr" & response.fixed == "g", "yes",
ifelse(yn.key == "gr" & response.fixed == "r", "no",
ifelse(yn.key == "rg" & response.fixed == "r", "yes",
ifelse(yn.key == "rg" & response.fixed == "g", "no",
ifelse(yn.key == "43" & response.fixed == "4", "yes",
ifelse(yn.key == "43" & response.fixed == "3", "no",
ifelse(yn.key == "34" & response.fixed == "3", "yes",
ifelse(yn.key == "34" & response.fixed == "4", "no",
ifelse(yn.key == "12" & response.fixed == "1", "yes",
ifelse(yn.key == "12" & response.fixed == "2", "no", NA)))))))))))
unique(data5$response.yn)## [1] NA "no" "yes"
## [1] "hvsvhpsp" "svhvsphp" "hpsphvsv" "sphpsvhv"
data6 = data5 %>%
mutate(trial = as.numeric(trial),
target = ifelse(grepl("^h", order) & trial %in% c(1:20, 41:60), "other",
ifelse(grepl("^h", order) & trial %in% c(21:40, 61:80), "self",
ifelse(grepl("^s", order) & trial %in% c(1:20, 41:60), "self",
ifelse(grepl("^s", order) & trial %in% c(21:40, 61:80), "other", NA)))),
domain = ifelse(grepl("^v", stimulus), "academic", "social"),
valence = ifelse(grepl("vp|pp", stimulus), "positive", "negative"))
unique(data6$target)## [1] "other" "self"
## [1] "academic" "social"
## [1] "positive" "negative"
# social only
data.status %>%
filter(target == "self" & domain == "social") %>%
arrange(task_percent)# for participants missing yn.key
data.status %>%
filter(target == "self" & ((subjectID %in% missing.buttons.t1 & wave == "t1") | (subjectID %in% missing.buttons.t2 & wave == "t2") | (subjectID %in% missing.buttons.t3 & wave == "t3"))) %>%
arrange(task_percent)data.status %>%
filter(target == "self" & domain == "social" & ((subjectID %in% missing.buttons.t1 & wave == "t1") | (subjectID %in% missing.buttons.t2 & wave == "t2") | (subjectID %in% missing.buttons.t3 & wave == "t3")))data.status %>%
filter(target == "self" & ((subjectID %in% missing.buttons.t1 & wave == "t1") | (subjectID %in% missing.buttons.t2 & wave == "t2") | (subjectID %in% missing.buttons.t3 & wave == "t3"))) %>%
arrange(subjectID)